Time series clustering is to partition time series data into groups based on similarity or distance, so that time series in the same cluster are similar.
Methodology followed:
# https://gist.github.com/Zsailer/5d1f4e357c78409dd9a5a4e5c61be552
from IPython.display import HTML
from IPython.display import display
tag = HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.cell.code_cell.rendered.selected div.input').hide();
} else {
$('div.cell.code_cell.rendered.selected div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
To show/hide this cell's raw code input, click <a href="javascript:code_toggle()">here</a>.''')
display(tag)
def load_one_dataset(direc = 'data', dataset="EMG", filename = "."):
"""
Load one dataset.
"""
datadir = direc + '/' + dataset + '/' + filename
data = np.loadtxt(datadir, delimiter=',')
data = np.concatenate((data, ), axis=0)
data = np.expand_dims(data, -1)
return data[:, 1:, :], data[:, 0, :]
def load_data(direc = 'data', dataset="EMG", all_file = [], do_pca = False, single_channel = None,
batch_size = 32, seq_len = 10, pca_component = 6):
"""
Load all dataset and preprocess.
all_file: list of all the filenames to load. Each have a segments * (seq length * channel amount) * 1 ndarray.
do_pca: preform PCA on training data. If true, return original and transformed data, and PCA object.
If False, return original data.
single_channel: a list of channels to load, channel starts with 1. If None, load all channels.
seq_length: should be the same as when converting to vrae format.
Return:
X_train: data in segments * seq length * features ndarray.
y_train: labels in segments * 1 ndarray.
X_train_ori: X_train before doing PCA.
X_pca: PCA object.
"""
# Load every dataset in list
X_train = []
y_train = []
for file in all_file:
X_train_small, y_train_small = load_one_dataset(direc = 'data', dataset="EMG", filename = file)
X_train.append(X_train_small)
y_train.append(y_train_small)
print(f'Loading {file}, X shape {X_train_small.shape}, y shape {y_train_small.shape}', end = '')
print(f', has label {np.unique(y_train_small)}')
# Concatenate into one np array
if len(all_file) == 1:
X_train = X_train[0]
y_train = y_train[0]
else:
X_train = np.concatenate(X_train, axis = 0)
y_train = np.concatenate(y_train, axis = 0)
# Cut the last several segments to match batch size
X_train = X_train.reshape(X_train.shape[0], seq_len, -1)
num_seg = (X_train.shape[0] // batch_size) * batch_size
X_train = X_train[:num_seg, :, :]
y_train = y_train[:num_seg, :]
# Check
if do_pca and single_channel:
raise ValueError("Don't do both pca and single channel.")
# Doing pca
if do_pca:
print('Doing PCA')
# Copy original training data
X_train_ori = np.copy(X_train)
# Explained variance
temp = X_train.reshape(-1, X_train.shape[2])
X_pca = PCA(n_components=15).fit(temp)
print(f'Explained variance ratio: {np.cumsum(X_pca.explained_variance_ratio_)}')
# Need to specify n_components inside pca for later reconstruction
X_pca = PCA(n_components = pca_component).fit(temp)
X_train = X_pca.transform(temp)
X_train = X_train.reshape(-1, seq_len, pca_component)
# Extract single channels
if single_channel:
print(f'Extracting channels {single_channel}')
single_channel = np.array(single_channel)-1
X_train = X_train[:, :, single_channel]
#X_train = np.expand_dims(X_train, -1)
print(f'Dataset shape: {X_train.shape}')
print(f'Label: {np.unique(y_train)}, shape: {y_train.shape}')
if do_pca:
return X_train, y_train, X_train_ori, X_pca
return X_train, y_train
def recon(model, dataset):
"""
Pass dataset through vrae to get a reconstruction.
model: trained vrae model.
dataset: original data in segments * seq length * features ndarray.
return:
reconstruction: segments * seq length * features ndarray.
"""
torch_data = TensorDataset(torch.from_numpy(dataset))
reconstruction = vrae.reconstruct(torch_data)
reconstruction = reconstruction.transpose((1,0,2))
return reconstruction
def plot_recon_feature(dataset, reconstruction, idx = None):
"""
Plot the original and reconstructed feature of one segment.
dataset: original data in segments * seq length * features ndarray.
reconstruction: reconstructed data in segments * seq length * features ndarray.
idx: Index of segment to plot. If None, randomly choose one.
"""
num_seq = dataset.shape[0]
num_features = dataset.shape[2]
num_rows = -(-num_features//5)
# reconstruction = recon(model, dataset)
if idx:
idx = idx-1
else:
idx = np.random.choice(num_seq, 1)[0]-1
fig, axs = plt.subplots(num_rows, 5, figsize = (20, num_rows*5))
for ii in range(num_features):
ori = dataset[idx, :, ii]
rec = reconstruction[idx, :, ii]
axs[ii//5, ii%5].plot(ori, color = 'black')
axs[ii//5, ii%5].plot(rec, color = 'red')
axs[ii//5, ii%5].set_title(f'Feature #{ii+1}')
fig.suptitle(f'Ori and rec of sequesce # {idx+1}', size = 20)
plt.show()
def plot_recon_metrics(dataset, reconstruction, x_lim = None, verbose = True):
"""
Plot correlation, mse, mean.
dataset: original data in segments * seq length * features ndarray.
reconstruction: reconstructed data in the same shape as dataset.
"""
if x_lim:
dataset = dataset[x_lim[0]:x_lim[1], :]
reconstruction = reconstruction[x_lim[0]:x_lim[1], :]
num_seq = dataset.shape[0]
num_features = dataset.shape[2]
mse_all = ((dataset-reconstruction)**2).mean(axis=1)
mean_all = np.mean(dataset, axis = 1)
corr_all = []
for ii in range(num_features):
corr_channel = []
for jj in range(num_seq):
corr_seq = np.corrcoef(reconstruction[jj, :, ii], dataset[jj, :, ii])[0,1]
corr_channel.append(corr_seq)
corr_channel = np.array(corr_channel)
corr_all.append(corr_channel)
corr_all = np.array(corr_all).transpose()
if num_features == 1:
fig, axs = plt.subplots(2, 1, figsize = (20, 12))
else:
fig, axs = plt.subplots(num_features, 1, figsize = (20, num_features*6))
times = np.max(mse_all, axis = 0)
for ii in range(num_features):
axs[ii].plot(corr_all[:,ii]*times[ii]/3, color = 'r', label = 'corr')
axs[ii].plot(mse_all[:, ii], color = 'y', label = 'mse')
axs[ii].plot(mean_all[:, ii], color = 'dimgray', label = 'mean', alpha = 0.7)
axs[ii].set_title(f'# {ii+1}, mean corr = {np.mean(corr_all[:,ii]):.4f}, ' \
f'mean mse = {np.mean(mse_all[:, ii]):.4f}, ' \
f'mean = {np.mean(mean_all[:, ii]):.4f}')
# if x_lim:
# axs[ii].set_xlim(x_lim)
axs[ii].legend()
if verbose:
corr_mean = np.mean(corr_all, axis = 0)
mse_mean = np.mean(mse_all, axis = 0)
mean_mean = np.mean(mean_all, axis = 0)
for jj in range(num_features):
print(f'Channel {jj+1}, corr = {corr_mean[jj]:.4f}, '\
f'mse = {mse_mean[jj]:4f}, mean = {mean_mean[jj]:.4f}.')
def pca_inverse(PCA_obj, reconstruction):
"""
Convert reconstructed PCs back to channels.
PCA_obj: fitted PCA.
reconstruction: in segments * seq length * features ndarray.
"""
seq_len = reconstruction.shape[1]
num_features = reconstruction.shape[2]
reconstruction = reconstruction.reshape(-1, num_features)
recon_channel = PCA_obj.inverse_transform(reconstruction)
recon_channel = recon_channel.reshape(-1, seq_len, 15)
return recon_channel
def visualize(z_run, y, inv_bhvs, one_in = 4, perplexity=80, n_iter=3000):
"""
Visualize latent space using PCA and tSNE
z_run: latent values, n_segments * lateng length ndarray.
y: label of each segment.
inv_bhvs: dict from label to behavior classes.
one_in: default use one in every 4 segments.
perplexity, n_iter: for tSNE.
"""
z_run_down = z_run[::one_in, :]
label = y[::one_in, :]
# z_run_pca = TruncatedSVD(n_components=2).fit_transform(z_run_down)
z_run_pca = PCA(n_components=2).fit_transform(z_run_down)
z_run_tsne = TSNE(perplexity=perplexity, min_grad_norm=1E-12, n_iter=n_iter).fit_transform(z_run_down)
all_colors = ['b','g','r','c','m','y','darkgrey']
fig, axs = plt.subplots(1,2, figsize=(20,10))
for ii in np.unique(label):
ii = int(ii)
x_pca = z_run_pca[:,0].reshape(-1,1)[label == ii]
y_pca = z_run_pca[:,1].reshape(-1,1)[label == ii]
axs[0].scatter(x_pca, y_pca, c=all_colors[ii], marker='.', label = inv_bhvs[ii], linewidths=None)
x_tsne = z_run_tsne[:,0].reshape(-1,1)[label == ii]
y_tsne = z_run_tsne[:,1].reshape(-1,1)[label == ii]
axs[1].scatter(x_tsne, y_tsne, c=all_colors[ii], marker='.', label = inv_bhvs[ii], linewidths=None)
axs[0].set_title('PCA on z_run')
axs[1].set_title('tSNE on z_run')
axs[0].legend()
axs[1].legend()
plt.show()
load_one_dataset \ load_data \ recon \ plot_recon_feature \ plot_recon_metrics \ pca_inverse \ visualize
from vrae.vrae import VRAE
from vrae.utils import *
import numpy as np
import torch
import pickle
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.manifold import TSNE
from sklearn.metrics import mean_squared_error as mse
import plotly
from torch.utils.data import DataLoader, TensorDataset
plotly.offline.init_notebook_mode()
%load_ext autoreload
%autoreload 2
dload = './model_dir'
seq_len = 10
hidden_size = 256
hidden_layer_depth = 3
latent_length = 16
batch_size = 16
learning_rate = 0.00002
n_epochs = 1500
dropout_rate = 0.0
optimizer = 'Adam' # options: ADAM, SGD
cuda = True # options: True, False
print_every=10
clip = True # options: True, False
max_grad_norm=5
loss = 'MSELoss' # options: SmoothL1Loss, MSELoss
block = 'LSTM' # options: LSTM, GRU
output = True
training_file = ['20201020_Pop_Cage_001','20201020_Pop_Cage_002','20201020_Pop_Cage_003','20201020_Pop_Cage_004',
'20201020_Pop_Cage_006']
X_train, y_train = load_data(direc = 'data', dataset="EMG", all_file = training_file,
do_pca = False, single_channel = None,
batch_size = batch_size, seq_len = seq_len, pca_component = 6)
train_dataset = TensorDataset(torch.from_numpy(X_train))
Loading 20201020_Pop_Cage_001, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3.] Loading 20201020_Pop_Cage_002, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3.] Loading 20201020_Pop_Cage_003, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 4.] Loading 20201020_Pop_Cage_004, X shape (3601, 150, 1), y shape (3601, 1), has label [-1. 0. 1. 2. 3. 4.] Loading 20201020_Pop_Cage_006, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 4.] Dataset shape: (17984, 10, 15) Label: [-1. 0. 1. 2. 3. 4.], shape: (17984, 1)
num_features = X_train.shape[2]
VRAE inherits from sklearn.base.BaseEstimator and overrides fit, transform and fit_transform functions, similar to sklearn modules
vrae = VRAE(sequence_length=seq_len,
number_of_features = num_features,
hidden_size = hidden_size,
hidden_layer_depth = hidden_layer_depth,
latent_length = latent_length,
batch_size = batch_size,
learning_rate = learning_rate,
n_epochs = n_epochs,
dropout_rate = dropout_rate,
optimizer = optimizer,
cuda = cuda,
print_every=print_every,
clip=clip,
max_grad_norm=max_grad_norm,
loss = loss,
block = block,
dload = dload,
output = output)
/home/roton2/miniconda3/envs/emg/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
#vrae.fit(train_dataset)
#If the model has to be saved, with the learnt parameters use:
vrae.fit(train_dataset)
Epoch: 9 Average loss: 2753808.3089 Epoch: 19 Average loss: 2750278.1112 Epoch: 29 Average loss: 2750297.7673 Epoch: 39 Average loss: 2750353.3879 Epoch: 49 Average loss: 2750331.5382 Epoch: 59 Average loss: 2750281.1842 Epoch: 69 Average loss: 2750234.1768 Epoch: 79 Average loss: 2750209.5085 Epoch: 89 Average loss: 2750294.7559 Epoch: 99 Average loss: 2750361.5524 Epoch: 109 Average loss: 2750166.1496 Epoch: 119 Average loss: 2750217.1616 Epoch: 129 Average loss: 2750247.0775 Epoch: 139 Average loss: 2750212.1312 Epoch: 149 Average loss: 2750199.1081 Epoch: 159 Average loss: 2750444.3260 Epoch: 169 Average loss: 2750141.7020 Epoch: 179 Average loss: 2750302.6312 Epoch: 189 Average loss: 2750208.5589 Epoch: 199 Average loss: 2750238.2619 Epoch: 209 Average loss: 2750216.9125 Epoch: 219 Average loss: 2750161.4843 Epoch: 229 Average loss: 2750142.7226 Epoch: 239 Average loss: 2750161.9488 Epoch: 249 Average loss: 2750082.5606 Epoch: 259 Average loss: 2750162.9351 Epoch: 269 Average loss: 2750243.2284 Epoch: 279 Average loss: 2750153.9770 Epoch: 289 Average loss: 2750175.4538 Epoch: 299 Average loss: 2750157.6986 Epoch: 309 Average loss: 2750143.6431 Epoch: 319 Average loss: 2750139.4170 Epoch: 329 Average loss: 2750066.9041 Epoch: 339 Average loss: 2750043.8505 Epoch: 349 Average loss: 2750008.7509 Epoch: 359 Average loss: 2750107.2509 Epoch: 369 Average loss: 2750113.3483 Epoch: 379 Average loss: 2750005.0673 Epoch: 389 Average loss: 2750098.3291 Epoch: 399 Average loss: 2749921.3002 Epoch: 409 Average loss: 2750004.5864 Epoch: 419 Average loss: 2749913.9580 Epoch: 429 Average loss: 2750024.8535 Epoch: 439 Average loss: 2749922.6714 Epoch: 449 Average loss: 2750018.7806 Epoch: 459 Average loss: 2749932.9929 Epoch: 469 Average loss: 2749950.1430 Epoch: 479 Average loss: 2750041.0043 Epoch: 489 Average loss: 2749941.3907 Epoch: 499 Average loss: 2749812.5983 Epoch: 509 Average loss: 2749868.7103 Epoch: 519 Average loss: 2749899.0323 Epoch: 529 Average loss: 2749947.1249 Epoch: 539 Average loss: 2749959.9289 Epoch: 549 Average loss: 2749854.7535 Epoch: 559 Average loss: 2749911.4662 Epoch: 569 Average loss: 2749872.8976 Epoch: 579 Average loss: 2749784.8443 Epoch: 589 Average loss: 2749878.7559 Epoch: 599 Average loss: 2749799.3576 Epoch: 609 Average loss: 2749882.0560 Epoch: 619 Average loss: 2749838.9423 Epoch: 629 Average loss: 2749748.3564 Epoch: 639 Average loss: 2749911.1562 Epoch: 649 Average loss: 2749842.4957 Epoch: 659 Average loss: 2749814.5493 Epoch: 669 Average loss: 2749899.8456 Epoch: 679 Average loss: 2749835.2120 Epoch: 689 Average loss: 2749818.8216 Epoch: 699 Average loss: 2749816.9470 Epoch: 709 Average loss: 2749830.3742 Epoch: 719 Average loss: 2749792.4255 Epoch: 729 Average loss: 2749784.9643 Epoch: 739 Average loss: 2749908.1664 Epoch: 749 Average loss: 2749717.4285 Epoch: 759 Average loss: 2749737.0973 Epoch: 769 Average loss: 2749774.7644 Epoch: 779 Average loss: 2749705.6388 Epoch: 789 Average loss: 2749777.5391 Epoch: 799 Average loss: 2749666.5713 Epoch: 809 Average loss: 2749659.3738 Epoch: 819 Average loss: 2749745.1945 Epoch: 829 Average loss: 2749686.9721 Epoch: 839 Average loss: 2749808.9118 Epoch: 849 Average loss: 2749682.7129 Epoch: 859 Average loss: 2749659.9625 Epoch: 869 Average loss: 2749673.8476 Epoch: 879 Average loss: 2749580.2711 Epoch: 889 Average loss: 2749559.1805 Epoch: 899 Average loss: 2749611.3613 Epoch: 909 Average loss: 2749634.7018 Epoch: 919 Average loss: 2749604.9360 Epoch: 929 Average loss: 2749628.8327 Epoch: 939 Average loss: 2749583.0692 Epoch: 949 Average loss: 2749550.6931 Epoch: 959 Average loss: 2749592.5402 Epoch: 969 Average loss: 2749563.5217 Epoch: 979 Average loss: 2749557.5431 Epoch: 989 Average loss: 2749544.0610 Epoch: 999 Average loss: 2749526.9655 Epoch: 1009 Average loss: 2749596.9270 Epoch: 1019 Average loss: 2749592.6955 Epoch: 1029 Average loss: 2749549.8949 Epoch: 1039 Average loss: 2749465.2425 Epoch: 1049 Average loss: 2749485.4544 Epoch: 1059 Average loss: 2749436.1545 Epoch: 1069 Average loss: 2749427.9078 Epoch: 1079 Average loss: 2749492.2951 Epoch: 1089 Average loss: 2749611.0618 Epoch: 1099 Average loss: 2749435.2493 Epoch: 1109 Average loss: 2749398.7122 Epoch: 1119 Average loss: 2749379.5794 Epoch: 1129 Average loss: 2749416.0557 Epoch: 1139 Average loss: 2749510.3720 Epoch: 1149 Average loss: 2749447.2226 Epoch: 1159 Average loss: 2749428.6791 Epoch: 1169 Average loss: 2749304.8746 Epoch: 1179 Average loss: 2749481.9073 Epoch: 1189 Average loss: 2749335.4777 Epoch: 1199 Average loss: 2749381.2110 Epoch: 1209 Average loss: 2749407.7744 Epoch: 1219 Average loss: 2749459.6749 Epoch: 1229 Average loss: 2749459.9669 Epoch: 1239 Average loss: 2749348.6435 Epoch: 1249 Average loss: 2749357.6178 Epoch: 1259 Average loss: 2749351.0322 Epoch: 1269 Average loss: 2749327.9076 Epoch: 1279 Average loss: 2749260.2022 Epoch: 1289 Average loss: 2749274.9669 Epoch: 1299 Average loss: 2749341.1099 Epoch: 1309 Average loss: 2749426.3793 Epoch: 1319 Average loss: 2749267.6621 Epoch: 1329 Average loss: 2749288.8320 Epoch: 1339 Average loss: 2749317.9895 Epoch: 1349 Average loss: 2749212.0553 Epoch: 1359 Average loss: 2749235.5993 Epoch: 1369 Average loss: 2749244.5072 Epoch: 1379 Average loss: 2749246.2757 Epoch: 1389 Average loss: 2749358.0459 Epoch: 1399 Average loss: 2749232.0365 Epoch: 1409 Average loss: 2749331.2506 Epoch: 1419 Average loss: 2749331.1114 Epoch: 1429 Average loss: 2749203.6857 Epoch: 1439 Average loss: 2749198.2521 Epoch: 1449 Average loss: 2749241.7243 Epoch: 1459 Average loss: 2749234.5636 Epoch: 1469 Average loss: 2749176.9709 Epoch: 1479 Average loss: 2749290.6212 Epoch: 1489 Average loss: 2749274.4290 Epoch: 1499 Average loss: 2749194.6113
plt.plot(vrae.all_loss)
[<matplotlib.lines.Line2D at 0x7fe7241c6670>]
plt.plot(vrae.rec_mse)
[<matplotlib.lines.Line2D at 0x7fe71efb6f40>]
#If the latent vectors have to be saved, pass the parameter `save`
z_run = vrae.transform(train_dataset, save = True, filename = 'z_run_e57_out_1500epoch.pkl')
z_run.shape
(17984, 16)
vrae.save('./vrae_e57_out_1500epoch.pth')
vrae.load(dload+'/vrae_e5_3000epoch.pth')
# with open(dload+'/z_run_e57pca_2000epoch.pkl', 'rb') as fh:
# z_run = pickle.load(fh)
reconstruction = recon(vrae, X_train)
plot_recon_feature(X_train, reconstruction, idx = None)
plot_recon_metrics(X_train, reconstruction, x_lim = [2000, 4000])
Channel 1, corr = -0.0038, mse = 628.675829, mean = 32.3581. Channel 2, corr = -0.0115, mse = 411.133805, mean = 28.7847. Channel 3, corr = -0.0055, mse = 434.559996, mean = 30.8992. Channel 4, corr = -0.0056, mse = 224.986640, mean = 20.1233. Channel 5, corr = -0.0164, mse = 139.837401, mean = 13.5944. Channel 6, corr = -0.0043, mse = 1092.944435, mean = 34.9307. Channel 7, corr = -0.0057, mse = 1986.438745, mean = 48.0828. Channel 8, corr = 0.0007, mse = 1964.232910, mean = 55.4692. Channel 9, corr = -0.0035, mse = 303.164925, mean = 23.7708. Channel 10, corr = -0.0164, mse = 573.538220, mean = 28.5548. Channel 11, corr = -0.0032, mse = 2619.802299, mean = 47.2827. Channel 12, corr = -0.0140, mse = 313.059530, mean = 21.3178. Channel 13, corr = -0.0050, mse = 1631.443631, mean = 45.9477. Channel 14, corr = -0.0004, mse = 1432.719750, mean = 38.7298. Channel 15, corr = 0.0038, mse = 1151.843008, mean = 33.5783.
recon_channel = pca_inverse(X_pca, reconstruction)
plot_recon_feature(X_train_ori, recon_channel, idx = None)
plot_recon_metrics(X_train_ori, recon_channel, x_lim = [0, 2000])
testing_file = ['20201020_Pop_Cage_005','20201020_Pop_Cage_007']
X_test, y_test = load_data(direc = 'data', dataset="EMG", all_file = testing_file,
do_pca = False, single_channel = None,
batch_size = batch_size, seq_len = seq_len, pca_component = 6)
Loading 20201020_Pop_Cage_005, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 5.] Loading 20201020_Pop_Cage_007, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 4.] Dataset shape: (7184, 10, 15) Label: [-1. 0. 1. 2. 3. 4. 5.], shape: (7184, 1)
recon_test = recon(vrae, X_test)
# recon_channel_test = pca_inverse(test_pca, recon_test)
plot_recon_feature(X_test, recon_test, idx = None)
plot_recon_metrics(X_test, recon_test, x_lim = [0, 2000])
Channel 1, corr = -0.0132, mse = 945.206654, mean = 32.4938. Channel 2, corr = -0.0176, mse = 539.238635, mean = 28.8226. Channel 3, corr = -0.0173, mse = 553.454450, mean = 33.7234. Channel 4, corr = -0.0034, mse = 234.683728, mean = 20.6514. Channel 5, corr = -0.0059, mse = 147.356018, mean = 13.4626. Channel 6, corr = -0.0018, mse = 1098.660772, mean = 31.9634. Channel 7, corr = -0.0102, mse = 2819.766440, mean = 51.8214. Channel 8, corr = -0.0119, mse = 2639.944595, mean = 57.4305. Channel 9, corr = -0.0180, mse = 341.061941, mean = 22.3521. Channel 10, corr = -0.0141, mse = 806.485379, mean = 32.1617. Channel 11, corr = -0.0028, mse = 2765.536851, mean = 47.7612. Channel 12, corr = -0.0086, mse = 356.829789, mean = 21.8368. Channel 13, corr = -0.0139, mse = 2509.619079, mean = 53.2006. Channel 14, corr = -0.0033, mse = 2203.700215, mean = 42.0137. Channel 15, corr = -0.0070, mse = 2038.722618, mean = 39.3028.
testing_file = ['20201020_Pop_Cage_005']
X_test, y_test = load_data(direc = 'data', dataset="EMG", all_file = testing_file,
do_pca = False, single_channel = None,
batch_size = batch_size, seq_len = seq_len, pca_component = 6)
Loading 20201020_Pop_Cage_005, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 5.] Dataset shape: (3584, 10, 15) Label: [-1. 0. 1. 2. 3. 5.], shape: (3584, 1)
recon_test = recon(vrae, X_test)
plot_recon_feature(X_test, recon_test, idx = None)
plot_recon_metrics(X_test, recon_test, x_lim = [0, 2000])
bhvs = {'crawling': np.array([0]),
'high picking treats': np.array([1]),
'low picking treats': np.array([2]),
'pg': np.array([3]),
'sitting still': np.array([4]),
'grooming': np.array([5]),
'no_behavior': np.array([-1])}
inv_bhvs = {int(v): k for k, v in bhvs.items()}
visualize(z_run, y = y_train, inv_bhvs = inv_bhvs)
test_dataset = TensorDataset(torch.from_numpy(X_test))
z_run_test = vrae.transform(test_dataset, save = False)
visualize(z_run_test, y = y_test, inv_bhvs = inv_bhvs, one_in = 4)
# # Create clusters.annot
# from sklearn.mixture import GaussianMixture
# # Predict cluster assignments
# gm = GaussianMixture(n_components=5, random_state=0).fit(z_run)
# clusters = gm.predict(z_run)
# # Number of seconds in each sequence
# filt_time_step = 0.025
# num_secs_seq = sequence_length * filt_time_step
# end_time = len(z_run) * num_secs_seq + num_secs_seq
# # Print head of the file
# f = open ('Pop01-06_18_2021.annot','w')
# # write the header--------------------
# f.write('Bento annotation file\n')
# f.write('Movie file(s): {}\n\n'.format('Pop_20210618_cage_C1_01.avi'))
# f.write('{0} {1}\n'.format('Stimulus name:',''))
# f.write('{0} {1}\n'.format('Annotation start frame:',1))
# f.write('{0} {1}\n'.format('Annotation stop frame:', 26994))
# f.write('{0} {1}\n'.format('Annotation framerate:', 30))
# f.write('\n{0}\n'.format('List of channels:'))
# channels = ['cluster_num']
# for item in channels:
# f.write('{0}\n'.format(item))
# f.write('\n');
# f.write('{0}\n'.format('List of annotations:'))
# clust_names = ['cluster_{}'.format(str(num)) for num in set(clusters)]
# labels = clust_names
# # labels = [item.replace(' ','_') for item in labels]
# for item in labels:
# f.write('{0}\n'.format(item))
# f.write('\n')
# # now write the contents---------------
# for ch in channels:
# f.write('{0}----------\n'.format(ch))
# for beh in labels:
# f.write('>{0}\n'.format(beh))
# f.write('{0}\t {1}\t {2} \n'.format('Start','Stop','Duration'))
# idxs = np.where(clusters == int(beh.split('_')[-1]))[0]
# for hit in idxs:
# start_time = hit * num_secs_seq/2
# end_time = start_time + num_secs_seq
# f.write('{0}\t{1}\t{2}\n'.format(start_time, end_time, num_secs_seq))
# f.write('\n')
# f.write('\n')
# f.close()